rm(list=ls())

0.1 Importing library

## Importing library
### List of required packages
required_packages <- c("tidyverse","janitor" ,"readr","dplyr","haven","sf", "flextable","sp", "factoextra", "FactoMineR","gtsummary", "sjPlot", "fastDummies","ggthemes","spdep","patchwork")

# Check if packages are installed
missing_packages <- setdiff(required_packages, installed.packages()[,"Package"])

### Install missing packages
if (length(missing_packages) > 0) {
  install.packages(missing_packages)
}

### Load all packages
lapply(required_packages, library, character.only = TRUE)
# Read shapefile data for 2013 and 2013


MPI_data_dr_2013 <- sf::read_sf(paste0(here::here(),"/output/output_data/MPI_data_dr_2013.shp"))
MPI_data_dr_2002 <- sf::read_sf(paste0(here::here(),"/output/output_data/MPI_data_dr_2002.shp"))
MPI_data_dr <- MPI_data_dr_2013 %>%
  plyr::rbind.fill(MPI_data_dr_2002) %>% 
  st_as_sf()

1 Regression modeling with Guediawaye Census data

#"hh_size",
predictors = c("menage","Aucun","Primair","Moyen","Secondr","fertlty","Ltrcy_F","nbr_cm_h","nbr_cm_f","pct_cm_f")
outcome = "MPI_men"

1.1 Inspecting the outcome variable (MPI) with visualization

mhv_map <- ggplot(MPI_data_dr_2013, aes(fill = MPI_men)) + 
  geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  theme_void() + 
  labs(fill = "MPI ")

mhv_histogram <- ggplot(MPI_data_dr_2013, aes(x = MPI_men)) + 
  geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
                 bins = 100) + 
  theme_minimal() + 
  scale_x_continuous(labels = scales::label_number_si(accuracy = 0.1)) + 
  labs(x = "MPI")

mhv_map + mhv_histogram + labs(title = "MPI value charts for Guediawaye Census 2013")

MPI_data_dr %>% 
    
      ggplot(aes(fill = MPI_men)) + 
      
      geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  theme_void() + 
  labs(fill = "MPI ")+
       
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

ggplot(MPI_data_dr, aes(x = MPI_men)) + 
  geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
                 bins = 100) + 
  theme_minimal() + 
  scale_x_continuous(labels = scales::label_number_si(accuracy = 0.1)) + 
  labs(x = "MPI")+
       
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

mhv_map_log <- ggplot(MPI_data_dr_2013, aes(fill = log(MPI_men))) + 
  geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  theme_void() + 
  labs(fill = "MPI\nvalue (log)")

mhv_histogram_log <- ggplot(MPI_data_dr_2013, aes(x = log(MPI_men))) + 
  geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
                 bins = 100) + 
  theme_minimal() + 
  scale_x_continuous() + 
  labs(x = "MPI (log)")

mhv_map_log + mhv_histogram_log + labs(title = "Logged MPI value charts for Guediawaye Census 2013")

MPI_data_dr %>% 
    
      ggplot(aes(fill = log(MPI_men))) + 
      
      geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  theme_void() + 
  labs(fill = "MPI\nvalue (log)")+
       
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

1.2 A first regression model

library(sf)
library(units)
predictors = c("menage","Primair","Moyen","Secondr","Ltrcy_F","pct_cm_f","hh_size","Ltrcy_A","Ltrcy_W","Ltrcy_M","fertlty")
outcome = "MPI_men"
MPI_data_dr_2013_for_model<- MPI_data_dr_2013 %>%
  dplyr::select(MPI_men,predictors) %>% 
  mutate(pop_density = as.numeric(set_units(hh_size / st_area(.), "1/km2"))) %>% 
  dplyr::select(-hh_size)

##
MPI_data_dr_2002_for_model<- MPI_data_dr_2002 %>%
  dplyr::select(MPI_men,predictors) %>% 
  mutate(pop_density = as.numeric(set_units(hh_size / st_area(.), "1/km2"))) %>% 
  dplyr::select(-hh_size)
formula <- "log(MPI_men) ~ menage  + Primair + Moyen + Secondr + Ltrcy_F + pct_cm_f + pop_density + Ltrcy_A + Ltrcy_W + Ltrcy_M + fertlty"

model_2013 <- lm(formula = formula, data = MPI_data_dr_2013_for_model)
summary(model_2013)
## 
## Call:
## lm(formula = formula, data = MPI_data_dr_2013_for_model)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9660 -0.1522  0.0151  0.1668  0.9155 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.206e+00  7.881e-02 -27.993  < 2e-16 ***
## menage       8.920e-03  8.090e-04  11.026  < 2e-16 ***
## Primair      4.489e-05  3.742e-04   0.120 0.904572    
## Moyen       -4.062e-03  8.660e-04  -4.690 3.69e-06 ***
## Secondr     -5.420e-03  1.042e-03  -5.202 3.09e-07 ***
## Ltrcy_F     -1.080e-03  2.616e-04  -4.130 4.37e-05 ***
## pct_cm_f    -2.412e-01  1.605e-01  -1.502 0.133743    
## pop_density  3.315e-06  8.562e-07   3.872 0.000125 ***
## Ltrcy_A      8.242e-04  3.680e-04   2.240 0.025623 *  
## Ltrcy_W      3.519e-04  3.775e-04   0.932 0.351822    
## Ltrcy_M     -5.074e-03  1.121e-02  -0.453 0.651031    
## fertlty      1.192e-04  7.926e-05   1.504 0.133427    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2763 on 423 degrees of freedom
## Multiple R-squared:  0.5892, Adjusted R-squared:  0.5785 
## F-statistic: 55.16 on 11 and 423 DF,  p-value: < 2.2e-16
###
model_2002 <- lm(formula = formula, data = MPI_data_dr_2002_for_model)

summary(model_2002)
## 
## Call:
## lm(formula = formula, data = MPI_data_dr_2002_for_model)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.93801 -0.14626  0.02407  0.15328  0.92920 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.502e+00  9.521e-02 -26.277  < 2e-16 ***
## menage       5.094e-03  6.425e-04   7.929 6.84e-14 ***
## Primair     -5.197e-04  9.546e-04  -0.544 0.586581    
## Moyen       -4.774e-03  1.305e-03  -3.659 0.000307 ***
## Secondr     -3.697e-03  1.733e-03  -2.133 0.033870 *  
## Ltrcy_F      6.215e-04  1.007e-03   0.617 0.537825    
## pct_cm_f     9.499e-02  2.789e-01   0.341 0.733699    
## pop_density -7.412e-07  8.743e-07  -0.848 0.397346    
## Ltrcy_A      3.452e-04  9.737e-05   3.545 0.000467 ***
## Ltrcy_W      2.135e-03  7.068e-04   3.021 0.002772 ** 
## Ltrcy_M     -1.789e-02  1.212e-02  -1.477 0.140927    
## fertlty      4.018e-05  5.000e-05   0.804 0.422424    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2598 on 256 degrees of freedom
## Multiple R-squared:  0.6361, Adjusted R-squared:  0.6205 
## F-statistic: 40.69 on 11 and 256 DF,  p-value: < 2.2e-16
library(corrr)

dfw_estimates_2013 <- MPI_data_dr_2013_for_model%>%
  select(-MPI_men) %>%
  st_drop_geometry()

correlations <- correlate(dfw_estimates_2013, method = "pearson")
network_plot(correlations) + labs(title = "Network plot of correlations between model predictors for Guediawaye Census 2013")

##
dfw_estimates_2002 <- MPI_data_dr_2002_for_model%>%
  select(-MPI_men) %>%
  st_drop_geometry()

correlations <- correlate(dfw_estimates_2002, method = "pearson")
network_plot(correlations) + labs(title = "Network plot of correlations between model predictors for Guediawaye Census 2002")

library(car)

vif(model_2013)
##      menage     Primair       Moyen     Secondr     Ltrcy_F    pct_cm_f 
##    3.871359    6.364877    8.106134    6.016109    7.975205    1.078805 
## pop_density     Ltrcy_A     Ltrcy_W     Ltrcy_M     fertlty 
##    1.310951    1.753154    1.482194    1.495710    5.367791
vif(model_2002)
##      menage     Primair       Moyen     Secondr     Ltrcy_F    pct_cm_f 
##    3.245263  296.905235   62.394454   23.511511  676.243716    1.192264 
## pop_density     Ltrcy_A     Ltrcy_W     Ltrcy_M     fertlty 
##    5.093219    4.316810    2.813059    2.154716   12.816729

1.3 Dimension reduction with principal components analysis

pca_2013 <- prcomp(
  formula = ~., 
  data = dfw_estimates_2013, 
  scale. = TRUE, 
  center = TRUE
)

summary(pca_2013)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     2.1830 1.2702 1.1990 1.0093 0.86570 0.70864 0.63550
## Proportion of Variance 0.4332 0.1467 0.1307 0.0926 0.06813 0.04565 0.03671
## Cumulative Proportion  0.4332 0.5799 0.7106 0.8032 0.87133 0.91698 0.95369
##                            PC8     PC9    PC10    PC11
## Standard deviation     0.46543 0.35450 0.28978 0.28829
## Proportion of Variance 0.01969 0.01142 0.00763 0.00756
## Cumulative Proportion  0.97339 0.98481 0.99244 1.00000
##
pca_2002 <- prcomp(
  formula = ~., 
  data = dfw_estimates_2002, 
  scale. = TRUE, 
  center = TRUE
)

summary(pca_2002)
## Importance of components:
##                          PC1    PC2    PC3     PC4    PC5     PC6     PC7
## Standard deviation     2.484 1.2669 1.1310 0.89828 0.6902 0.51195 0.42413
## Proportion of Variance 0.561 0.1459 0.1163 0.07335 0.0433 0.02383 0.01635
## Cumulative Proportion  0.561 0.7069 0.8232 0.89657 0.9399 0.96370 0.98005
##                            PC8     PC9    PC10    PC11
## Standard deviation     0.34446 0.24919 0.19411 0.03123
## Proportion of Variance 0.01079 0.00565 0.00343 0.00009
## Cumulative Proportion  0.99084 0.99649 0.99991 1.00000
pca_2013_tibble <- pca_2013$rotation %>%
  as_tibble(rownames = "predictor")
pca_2013_tibble
## # A tibble: 11 × 12
##    predictor       PC1      PC2     PC3      PC4     PC5    PC6      PC7     PC8
##    <chr>         <dbl>    <dbl>   <dbl>    <dbl>   <dbl>  <dbl>    <dbl>   <dbl>
##  1 menage      0.411    0.0273   0.0207 -0.00165 -0.116   0.171 -0.100   -0.860 
##  2 Primair     0.391    0.00136  0.292  -0.134   -0.0931  0.278 -0.182    0.321 
##  3 Moyen       0.423    0.0591  -0.169  -0.0299  -0.123  -0.105  0.0880   0.292 
##  4 Secondr     0.359    0.0776  -0.404   0.0669  -0.0637 -0.350  0.241   -0.0562
##  5 Ltrcy_F     0.419   -0.0249  -0.140   0.0607   0.118  -0.310  0.179    0.148 
##  6 pct_cm_f   -0.0501  -0.160    0.0499 -0.944    0.0273 -0.226  0.135   -0.0901
##  7 Ltrcy_A     0.187   -0.242    0.385   0.127    0.804  -0.193 -0.00979 -0.0681
##  8 Ltrcy_W     0.00824 -0.670   -0.121   0.0919  -0.249  -0.329 -0.598    0.0156
##  9 Ltrcy_M     0.00631 -0.670   -0.169   0.0737  -0.0179  0.453  0.556    0.0105
## 10 fertlty     0.397   -0.0175   0.210  -0.142   -0.0923  0.369 -0.195    0.177 
## 11 pop_densi…  0.00230 -0.0771   0.682   0.178   -0.477  -0.355  0.370   -0.0462
## # ℹ 3 more variables: PC9 <dbl>, PC10 <dbl>, PC11 <dbl>
###
pca_2002_tibble <- pca_2002$rotation %>%
  as_tibble(rownames = "predictor")
pca_2002_tibble
## # A tibble: 11 × 12
##    predictor       PC1     PC2     PC3      PC4     PC5     PC6     PC7      PC8
##    <chr>         <dbl>   <dbl>   <dbl>    <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
##  1 menage       0.317  -0.151   0.0760  1.53e-1  0.743  -0.386   0.0986 -0.334  
##  2 Primair      0.386  -0.0977 -0.0903  1.77e-2  0.0916  0.125   0.0829  0.551  
##  3 Moyen        0.366   0.0740  0.250   2.23e-1 -0.182   0.0868 -0.0602  0.0971 
##  4 Secondr      0.259   0.199   0.519   3.79e-1 -0.230   0.162  -0.0422 -0.401  
##  5 Ltrcy_F      0.394  -0.0113  0.0838  1.19e-1 -0.0357  0.120   0.0212  0.326  
##  6 pct_cm_f    -0.0469 -0.168  -0.612   7.50e-1 -0.0961  0.0674 -0.0988 -0.0935 
##  7 Ltrcy_A      0.342  -0.0715 -0.174  -2.34e-1 -0.311  -0.481  -0.664  -0.115  
##  8 Ltrcy_W      0.190   0.586  -0.273  -7.74e-6 -0.233  -0.446   0.540  -0.00999
##  9 Ltrcy_M      0.0267  0.707  -0.206  -4.56e-2  0.388   0.348  -0.425  -0.0105 
## 10 fertlty      0.373  -0.141  -0.163  -1.40e-1  0.140   0.135   0.0543  0.0967 
## 11 pop_density  0.316  -0.157  -0.315  -3.60e-1 -0.148   0.462   0.229  -0.526  
## # ℹ 3 more variables: PC9 <dbl>, PC10 <dbl>, PC11 <dbl>
pca_2013_tibble %>%
  select(predictor:PC5) %>%
  pivot_longer(PC1:PC5, names_to = "component", values_to = "value") %>%
  ggplot(aes(x = value, y = predictor)) + 
  geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) + 
  facet_wrap(~component, nrow = 1) + 
  labs(y = NULL, x = "Value", title = " Loadings for first five principal components  for Guediawaye Census 2013") + 
  theme_minimal()

pca_2002_tibble %>%
  select(predictor:PC5) %>%
  pivot_longer(PC1:PC5, names_to = "component", values_to = "value") %>%
  ggplot(aes(x = value, y = predictor)) + 
  geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) + 
  facet_wrap(~component, nrow = 1) + 
  labs(y = NULL, x = "Value", title = " Loadings for first five principal components  for Guediawaye Census 2002") + 
  theme_minimal()

components_2013 <- predict(pca_2013, dfw_estimates_2013)

dfw_pca_2013 <- MPI_data_dr_2013_for_model%>%
  select(MPI_men) %>%
  cbind(components_2013) 

ggplot(dfw_pca_2013, aes(fill = PC1)) +
  geom_sf(color = NA) +
  labs(title = "Map of principal component 1 for Guediawaye Census 2013") +
  theme_void() +
  scale_fill_viridis_c()

components_2002 <- predict(pca_2002, dfw_estimates_2002)

dfw_pca_2002 <- MPI_data_dr_2002_for_model%>%
  select(MPI_men) %>%
  cbind(components_2002) 

ggplot(dfw_pca_2002, aes(fill = PC1)) +
  geom_sf(color = NA) +
  labs(title = "Map of principal component 1 for Guediawaye Census 2002") +
  theme_void() +
  scale_fill_viridis_c()

dfw_pca_2002$RGPH<-"Census 2002"
dfw_pca_2013$RGPH<-"Census 2013"
dfw_pca <- dfw_pca_2002 %>%
  plyr::rbind.fill(dfw_pca_2013) %>% 
  st_as_sf()


dfw_pca %>% 
    
      ggplot(aes(fill = PC1))  + 
      
      # Adding spatial features to the plot
      geom_sf(color = NA) +
       scale_fill_viridis_c()+
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

dfw_pca %>% 
    
      ggplot(aes(fill = PC2))  + 
      
      # Adding spatial features to the plot
      geom_sf(color = NA) +
       scale_fill_viridis_c()+
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

pca_2013_formula <- paste0("log(MPI_men) ~ ", 
                      paste0('PC', 1:6, collapse = ' + '))

pca_2013_model <- lm(formula = pca_2013_formula, data = dfw_pca_2013)

summary(pca_2013_model)
## 
## Call:
## lm(formula = pca_2013_formula, data = dfw_pca_2013)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24987 -0.18277  0.02653  0.20470  1.04081 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -2.198765   0.015293 -143.780  < 2e-16 ***
## PC1         -0.053705   0.007013   -7.657 1.28e-13 ***
## PC2         -0.028598   0.012053   -2.373   0.0181 *  
## PC3          0.190360   0.012769   14.908  < 2e-16 ***
## PC4          0.012829   0.015169    0.846   0.3982    
## PC5         -0.024572   0.017685   -1.389   0.1654    
## PC6          0.161101   0.021605    7.457 4.97e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.319 on 428 degrees of freedom
## Multiple R-squared:  0.4461, Adjusted R-squared:  0.4384 
## F-statistic: 57.46 on 6 and 428 DF,  p-value: < 2.2e-16
###
pca_2002_formula <- paste0("log(MPI_men) ~ ", 
                      paste0('PC', 1:6, collapse = ' + '))

pca_2002_model <- lm(formula = pca_2002_formula, data = dfw_pca_2002)

summary(pca_2002_model)
## 
## Call:
## lm(formula = pca_2002_formula, data = dfw_pca_2002)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.85562 -0.14210  0.02181  0.16672  0.90081 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -2.372798   0.016131 -147.095  < 2e-16 ***
## PC1         -0.034208   0.006506   -5.258 3.03e-07 ***
## PC2         -0.077519   0.012757   -6.077 4.33e-09 ***
## PC3         -0.184749   0.014289  -12.929  < 2e-16 ***
## PC4         -0.118827   0.017991   -6.605 2.22e-10 ***
## PC5          0.209272   0.023416    8.937  < 2e-16 ***
## PC6         -0.254087   0.031568   -8.049 2.96e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2641 on 261 degrees of freedom
## Multiple R-squared:  0.6167, Adjusted R-squared:  0.6079 
## F-statistic:    70 on 6 and 261 DF,  p-value: < 2.2e-16

1.4 Spatial regression

MPI_data_dr_2013_for_model$residuals <- residuals(model_2013)

ggplot(MPI_data_dr_2013_for_model, aes(x = residuals)) + 
  geom_histogram(bins = 100, alpha = 0.5, color = "navy",
                 fill = "navy") + 
  labs(title = "Distribution of model residuals for Guediawaye Census 2013") +
  theme_minimal()

MPI_data_dr_2002_for_model$residuals <- residuals(model_2002)

ggplot(MPI_data_dr_2002_for_model, aes(x = residuals)) + 
  geom_histogram(bins = 100, alpha = 0.5, color = "navy",
                 fill = "navy") + 
  labs(title = "Distribution of model residuals for Guediawaye Census 2002") +
  theme_minimal()

MPI_data_dr_for_model <- MPI_data_dr_2002_for_model %>%
  dplyr::mutate(RGPH = "Census 2002") %>% 
  plyr::rbind.fill(MPI_data_dr_2013_for_model %>%
  dplyr::mutate(RGPH = "Census 2013"))

ggplot(MPI_data_dr_for_model, aes(x = residuals)) + 
      geom_histogram(bins = 100, alpha = 0.5, color = "navy",
                 fill = "navy") +
       
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

library(spdep)

wts_2013 <- MPI_data_dr_2013_for_model %>%
  poly2nb() %>%
  nb2listw()

moran.test(MPI_data_dr_2013_for_model$residuals, wts_2013)
## 
##  Moran I test under randomisation
## 
## data:  MPI_data_dr_2013_for_model$residuals  
## weights: wts_2013    
## 
## Moran I statistic standard deviate = 4.7117, p-value = 1.228e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.1325997182     -0.0023041475      0.0008197714
wts_2002 <- MPI_data_dr_2002_for_model %>%
  poly2nb() %>%
  nb2listw()

moran.test(MPI_data_dr_2002_for_model$residuals, wts_2002)
## 
##  Moran I test under randomisation
## 
## data:  MPI_data_dr_2002_for_model$residuals  
## weights: wts_2002    
## 
## Moran I statistic standard deviate = 2.1911, p-value = 0.01422
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.077202122      -0.003745318       0.001364795
MPI_data_dr_2013_for_model$lagged_residuals <- lag.listw(wts_2013, MPI_data_dr_2013_for_model$residuals)

ggplot(MPI_data_dr_2013_for_model, aes(x = residuals, y = lagged_residuals)) + 
  theme_minimal() + 
  labs(title = "Moran scatterplot of residual spatial autocorrelation for Guediawaye Census 2013") +
  geom_point(alpha = 0.5) + 
  geom_smooth(method = "lm", color = "red")

MPI_data_dr_2002_for_model$lagged_residuals <- lag.listw(wts_2002, MPI_data_dr_2002_for_model$residuals)

ggplot(MPI_data_dr_2002_for_model, aes(x = residuals, y = lagged_residuals)) + 
  theme_minimal() + 
  labs(title = "Moran scatterplot of residual spatial autocorrelation for Guediawaye Census 2002") +
  geom_point(alpha = 0.5) + 
  geom_smooth(method = "lm", color = "red")

MPI_data_dr_for_model <- MPI_data_dr_2002_for_model %>%
  dplyr::mutate(RGPH = "Census 2002") %>% 
  plyr::rbind.fill(MPI_data_dr_2013_for_model %>%
  dplyr::mutate(RGPH = "Census 2013"))

ggplot(MPI_data_dr_for_model, aes(x = residuals, y = lagged_residuals)) + 
  geom_point(alpha = 0.5) + 
  geom_smooth(method = "lm", color = "red")+
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

2 Geographically weighted regression

2.1 Choosing a bandwidth for GWR

library(GWmodel)
library(sf)

dfw_data_sp_2013 <- MPI_data_dr_2013_for_model%>%
  as_Spatial()

bw <- bw.gwr(
  formula = formula, 
  data = dfw_data_sp_2013, 
  kernel = "bisquare",
  adaptive = TRUE
)
## Adaptive bandwidth: 276 CV score: 33.03686 
## Adaptive bandwidth: 179 CV score: 32.93194 
## Adaptive bandwidth: 117 CV score: 34.2892 
## Adaptive bandwidth: 215 CV score: 32.81678 
## Adaptive bandwidth: 240 CV score: 32.88297 
## Adaptive bandwidth: 202 CV score: 32.80238 
## Adaptive bandwidth: 191 CV score: 32.84514 
## Adaptive bandwidth: 205 CV score: 32.78626 
## Adaptive bandwidth: 211 CV score: 32.81625 
## Adaptive bandwidth: 205 CV score: 32.78626
###
dfw_data_sp_2002 <- MPI_data_dr_2002_for_model%>%
  as_Spatial()

bw <- bw.gwr(
  formula = formula, 
  data = dfw_data_sp_2002, 
  kernel = "bisquare",
  adaptive = TRUE
)
## Adaptive bandwidth: 173 CV score: 30.77821 
## Adaptive bandwidth: 115 CV score: 22.62838 
## Adaptive bandwidth: 78 CV score: 18.01934 
## Adaptive bandwidth: 56 CV score: 20.35081 
## Adaptive bandwidth: 92 CV score: 18.83631 
## Adaptive bandwidth: 69 CV score: 18.26553 
## Adaptive bandwidth: 83 CV score: 18.02488 
## Adaptive bandwidth: 74 CV score: 18.01254 
## Adaptive bandwidth: 72 CV score: 18.08195 
## Adaptive bandwidth: 75 CV score: 17.91883 
## Adaptive bandwidth: 76 CV score: 17.96413 
## Adaptive bandwidth: 74 CV score: 18.01254 
## Adaptive bandwidth: 75 CV score: 17.91883
gw_model_2013 <- gwr.basic(
  formula = formula, 
  data = dfw_data_sp_2013, 
  bw = bw,
  kernel = "bisquare",
  adaptive = TRUE
)

###
gw_model_2002 <- gwr.basic(
  formula = formula, 
  data = dfw_data_sp_2002, 
  bw = bw,
  kernel = "bisquare",
  adaptive = TRUE
)
names(gw_model_2013)
## [1] "GW.arguments"  "GW.diagnostic" "lm"            "SDF"          
## [5] "timings"       "this.call"     "Ftests"
##
names(gw_model_2002)
## [1] "GW.arguments"  "GW.diagnostic" "lm"            "SDF"          
## [5] "timings"       "this.call"     "Ftests"
gw_model_2013_results <- gw_model_2013$SDF %>%
  st_as_sf() 

names(gw_model_2013_results)
##  [1] "Intercept"      "menage"         "Primair"        "Moyen"         
##  [5] "Secondr"        "Ltrcy_F"        "pct_cm_f"       "pop_density"   
##  [9] "Ltrcy_A"        "Ltrcy_W"        "Ltrcy_M"        "fertlty"       
## [13] "y"              "yhat"           "residual"       "CV_Score"      
## [17] "Stud_residual"  "Intercept_SE"   "menage_SE"      "Primair_SE"    
## [21] "Moyen_SE"       "Secondr_SE"     "Ltrcy_F_SE"     "pct_cm_f_SE"   
## [25] "pop_density_SE" "Ltrcy_A_SE"     "Ltrcy_W_SE"     "Ltrcy_M_SE"    
## [29] "fertlty_SE"     "Intercept_TV"   "menage_TV"      "Primair_TV"    
## [33] "Moyen_TV"       "Secondr_TV"     "Ltrcy_F_TV"     "pct_cm_f_TV"   
## [37] "pop_density_TV" "Ltrcy_A_TV"     "Ltrcy_W_TV"     "Ltrcy_M_TV"    
## [41] "fertlty_TV"     "Local_R2"       "geometry"
###
gw_model_2002_results <- gw_model_2002$SDF %>%
  st_as_sf() 

names(gw_model_2002_results)
##  [1] "Intercept"      "menage"         "Primair"        "Moyen"         
##  [5] "Secondr"        "Ltrcy_F"        "pct_cm_f"       "pop_density"   
##  [9] "Ltrcy_A"        "Ltrcy_W"        "Ltrcy_M"        "fertlty"       
## [13] "y"              "yhat"           "residual"       "CV_Score"      
## [17] "Stud_residual"  "Intercept_SE"   "menage_SE"      "Primair_SE"    
## [21] "Moyen_SE"       "Secondr_SE"     "Ltrcy_F_SE"     "pct_cm_f_SE"   
## [25] "pop_density_SE" "Ltrcy_A_SE"     "Ltrcy_W_SE"     "Ltrcy_M_SE"    
## [29] "fertlty_SE"     "Intercept_TV"   "menage_TV"      "Primair_TV"    
## [33] "Moyen_TV"       "Secondr_TV"     "Ltrcy_F_TV"     "pct_cm_f_TV"   
## [37] "pop_density_TV" "Ltrcy_A_TV"     "Ltrcy_W_TV"     "Ltrcy_M_TV"    
## [41] "fertlty_TV"     "Local_R2"       "geometry"
ggplot(gw_model_2013_results, aes(fill = Local_R2)) + 
  geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  labs(title = "Local R-squared values from the GWR model for Guediawaye Census 2013") +
  theme_void()

ggplot(gw_model_2002_results, aes(fill = Local_R2)) + 
  geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  labs(title = "Local R-squared values from the GWR model for Guediawaye Census 2002") +
  theme_void()

gw_model_results <- gw_model_2002_results %>%
  dplyr::mutate(RGPH = "Census 2002") %>% 
  plyr::rbind.fill(gw_model_2013_results %>%
  dplyr::mutate(RGPH = "Census 2013")) %>% 
  st_as_sf()


gw_model_results %>% 
    
      ggplot(aes(fill = Local_R2)) + 
      
      # Adding spatial features to the plot
      geom_sf() +
      
      scale_fill_viridis_c() +
       
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

ggplot(gw_model_2013_results, aes(fill = menage)) + 
  geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  labs(title = "Local parameter estimates for household members for Guediawaye Census 2013") +
  theme_void() + 
  labs(fill = "Local β for \nHH")

ggplot(gw_model_2002_results, aes(fill = menage)) + 
  geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  labs(title = "Local parameter estimates for household members for Guediawaye Census 2002") +
  theme_void() + 
  labs(fill = "Local β for \nHH")

gw_model_results <- gw_model_2002_results %>%
  dplyr::mutate(RGPH = "Census 2002") %>% 
  plyr::rbind.fill(gw_model_2013_results %>%
  dplyr::mutate(RGPH = "Census 2013")) %>% 
  st_as_sf()


gw_model_results %>% 
    
      ggplot(aes(fill = menage)) + 
      
      # Adding spatial features to the plot
      geom_sf() +
      
      scale_fill_viridis_c() +
  labs(fill = "Local β for \nHH")+
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

ggplot(gw_model_2013_results, aes(fill = pop_density)) + 
  geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  labs(title = "Local parameter estimates for population density for Guediawaye Census 2013") +
  theme_void() + 
  labs(fill = "Local β for \npopulation density")

ggplot(gw_model_2002_results, aes(fill = pop_density)) + 
  geom_sf(color = NA) + 
  scale_fill_viridis_c() + 
  labs(title = "Local parameter estimates for population density for Guediawaye Census 2002") +
  theme_void() + 
  labs(fill = "Local β for \npopulation density")

gw_model_results <- gw_model_2002_results %>%
  dplyr::mutate(RGPH = "Census 2002") %>% 
  plyr::rbind.fill(gw_model_2013_results %>%
  dplyr::mutate(RGPH = "Census 2013")) %>% 
  st_as_sf()


gw_model_results %>% 
    
      ggplot(aes(fill = pop_density)) + 
      
      # Adding spatial features to the plot
      geom_sf() +
      
      scale_fill_viridis_c() +
  labs(fill = "Local β for \npopulation density")+
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

3 Classification and clustering of Guediawaye census data

3.1 Geodemographic classification

set.seed(1994)

dfw_kmeans_2013 <- dfw_pca_2013 %>%
  st_drop_geometry() %>%
  select(PC1:PC8) %>%
  kmeans(centers = 6)

table(dfw_kmeans_2013$cluster)
## 
##   1   2   3   4   5   6 
##  53  73 139  71  95   4
##
dfw_kmeans_2002 <- dfw_pca_2002 %>%
  st_drop_geometry() %>%
  select(PC1:PC8) %>%
  kmeans(centers = 6)

table(dfw_kmeans_2002$cluster)
## 
##  1  2  3  4  5  6 
## 71 38  3  3 85 68
dfw_clusters_2013 <- dfw_pca_2013 %>%
  mutate(cluster = as.character(dfw_kmeans_2013$cluster))

ggplot(dfw_clusters_2013, aes(fill = cluster)) + 
  geom_sf(size = 0.1) + 
  scale_fill_brewer(palette = "Set1") + 
  labs(title = "Map of geodemographic clusters for Guediawaye Census 2013") +
  theme_void() + 
  labs(fill = "Cluster ")

dfw_clusters_2002 <- dfw_pca_2002 %>%
  mutate(cluster = as.character(dfw_kmeans_2002$cluster))

ggplot(dfw_clusters_2002, aes(fill = cluster)) + 
  geom_sf(size = 0.1) + 
  scale_fill_brewer(palette = "Set1") + 
  labs(title = "Map of geodemographic clusters for Guediawaye Census 2002") +
  theme_void() + 
  labs(fill = "Cluster ")

dfw_clusters <- dfw_clusters_2002 %>%
  dplyr::mutate(RGPH = "Census 2002") %>% 
  plyr::rbind.fill(dfw_clusters_2013 %>%
  dplyr::mutate(RGPH = "Census 2013")) %>% 
  st_as_sf()


dfw_clusters %>% 
    
      ggplot(aes(fill = cluster)) + 
      
  geom_sf(size = 0.1) + 
  scale_fill_brewer(palette = "Set1") + 
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            panel.grid = element_line(color = "white", size = 0.8))

library(plotly)

cluster_plot <- ggplot(dfw_clusters_2013, 
                       aes(x = PC1, y = PC2, color = cluster)) + 
  geom_point() + 
  scale_color_brewer(palette = "Set1") + 
  labs(title = "Interactive scatterplot of PC1 and PC2 colored by cluster for Guediawaye Census 2013") +
  theme_minimal()

ggplotly(cluster_plot) %>%
  layout(legend = list(orientation = "h", y = -0.15, 
                       x = 0.2, title = "Cluster"))
cluster_plot <- ggplot(dfw_clusters_2002, 
                       aes(x = PC1, y = PC2, color = cluster)) + 
  geom_point() + 
  scale_color_brewer(palette = "Set1") + 
  labs(title = "Interactive scatterplot of PC1 and PC2 colored by cluster for Guediawaye Census 2002") +
  theme_minimal()

ggplotly(cluster_plot) %>%
  layout(legend = list(orientation = "h", y = -0.15, 
                       x = 0.2, title = "Cluster"))

3.2 Spatial clustering & regionalization

library(spdep)

input_vars <- dfw_pca_2013 %>%
  select(PC1:PC8) %>%
  st_drop_geometry() %>%
  as.data.frame() 

skater_nbrs <- poly2nb(dfw_pca_2013, queen = TRUE)
costs <- nbcosts(skater_nbrs, input_vars)
skater_weights <- nb2listw(skater_nbrs, costs, style = "B")
mst <- mstree(skater_weights)

regions <- skater(
  mst[,1:2], 
  input_vars, 
  ncuts = 7,
  crit = 10
)
dfw_clusters_2013$region <- as.character(regions$group)

ggplot(dfw_clusters_2013, aes(fill = region)) + 
  geom_sf(size = 0.1) + 
  labs(title = "Map of contiguous regions derived with the SKATER algorithm for Guediawaye Census 2013") +
  scale_fill_brewer(palette = "Set1") + 
  theme_void()

input_vars <- dfw_pca_2002 %>%
  select(PC1:PC8) %>%
  st_drop_geometry() %>%
  as.data.frame() 

skater_nbrs <- poly2nb(dfw_pca_2002, queen = TRUE)
costs <- nbcosts(skater_nbrs, input_vars)
skater_weights <- nb2listw(skater_nbrs, costs, style = "B")
mst <- mstree(skater_weights)

regions <- skater(
  mst[,1:2], 
  input_vars, 
  ncuts = 7,
  crit = 10
)
dfw_clusters_2002$region <- as.character(regions$group)

ggplot(dfw_clusters_2002, aes(fill = region)) + 
  geom_sf(size = 0.1) + 
  labs(title = "Map of contiguous regions derived with the SKATER algorithm for Guediawaye Census 2002") +
  scale_fill_brewer(palette = "Set1") + 
  theme_void()

dfw_clusters <- dfw_clusters_2002 %>%
  dplyr::mutate(RGPH = "Census 2002") %>% 
  plyr::rbind.fill(dfw_clusters_2013 %>%
  dplyr::mutate(RGPH = "Census 2013")) %>% 
  st_as_sf()


dfw_clusters %>% 
    
      ggplot(aes(fill = region)) + 
      
  geom_sf(size = 0.1) + 
  scale_fill_brewer(palette = "Set2") + 
      facet_wrap(~RGPH) +
      
      # Adjusting the plot theme
      theme_map(base_size = 8) +
      theme(panel.background = element_rect(),
            #legend.background = element_blank(),
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            legend.position = "bottom", 
            text = element_text(size = 8), 
            #panel.grid = element_line(color = "white", size = 0.8),
            legend.box.background = element_rect(),
            legend.box.margin = margin(6, 6, 6, 6),
            plot.background = element_rect(fill = "white"))+
  ggspatial::annotation_scale(
    location = "br",
    bar_cols = c("grey60", "white"),
    text_family = "ArcherPro Book"
  ) +
  ggspatial::annotation_north_arrow(
    location = "tl", which_north = "true",
    #pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
    #which_north = "grid",
  height = unit(1, "cm"),
  width = unit(1, "cm"),
  )